********************************************************************************
* This program generates embodied IO table and intensities
*
*
* Edited: Aug 2020
********************************************************************************


*************************************************************************
* 				PART I . claim inputs and outputs
*************************************************************************
clear programs 

// Input data:
// Here Hal has subbed in the data generated from io_tables_to_stata.do
global INPUT_OUTPUT_DATA = "$IO/IxI_Domestic_2007_detail.dta"

//new shipments file, just 2007
global SHIPMENTS = "$buildpath/input/shipments/NBER_shipments_naics1997_1958-2011.dta"

// intensities.
// created in 3_2_gen_energyintensities.do
global ENERGY_I = "$EI/energy_intensity_2007_naics2007.dta"

// Output data:
global ENERGY_I_IO = "$IO/IO_energyintensity.dta"
global NAICS_INTENSITY = "$EI/naics_intensity_comp_2007.dta"

*************************************************************************
* 				PART II. Define functions
*************************************************************************

// Generate the naics_level and naics2-naics6 variables
program gen_naicsX_vars
    tempvar naics_str
    quietly tostring naics, generate(`naics_str')
    generate byte naics_level = length(`naics_str')
	
    quietly {
    compress  // compress *before* creating these so they're all longs
    generate long naics6 = naics                if naics_level == 6
    generate long naics5 = floor(naics /    10) if naics_level == 6
    replace  naics5 = naics                     if naics_level == 5
    generate long naics4 = floor(naics /   100) if naics_level == 6
    replace  naics4 = floor(naics /    10)      if naics_level == 5
    replace  naics4 = naics                     if naics_level == 4
    generate long naics3 = floor(naics /  1000) if naics_level == 6
    replace  naics3 = floor(naics /   100)      if naics_level == 5
    replace  naics3 = floor(naics /    10)      if naics_level == 4
    replace  naics3 = naics                     if naics_level == 3
    generate long naics2 = floor(naics / 10000) if naics_level == 6
    replace  naics2 = floor(naics /  1000)      if naics_level == 5
    replace  naics2 = floor(naics /   100)      if naics_level == 4
    replace  naics2 = floor(naics /    10)      if naics_level == 3
    replace  naics2 = naics                     if naics_level == 2
    }
end


program find_match
    args level_to_match to_save row_code ship_longvars
    confirm file "`row_code'"
	confirm file "`ship_longvars'"
    confirm integer number `level_to_match'
    use `row_code', clear
	rename rowNaics naics`level_to_match'
	merge 1:m naics`level_to_match' using "`ship_longvars'", ///
		keep(match) nogenerate noreport
	rename naics`level_to_match' rowNaics
	keep rowCode rowNaics naics vship
    quietly save "`to_save'"
end

*************************************************************************
* 				PART III. merge data and generate prices
*************************************************************************
/////////////////////////////////////////////////////////
//STEP 1: Open IO table, and get it down to NAICS 6 levels
/////////////////////////////////////////////////////////

//Load the IO table, keep all maufacturing, and save that
	tempfile manuf_io row_code ship ship_longvars colcode_naics /// 
				match_6 match_5 match_4 match_3 code_naics weights
	use "$INPUT_OUTPUT_DATA", clear
	gen splitat = strpos(rowCode,"3")
	keep if splitat == 1
	replace splitat = strpos(colCode,"3")
	keep if splitat == 1
	drop splitat
	//there are 288 rowNaics
	
	quietly save `manuf_io'
	keep rowCode rowNaics
	quietly duplicates drop
	quietly save `row_code'

//Now I want to bring in the shipping file, clean it down to its bare bones,
//and then just keep the naics I want
	use "$SHIPMENTS", clear
	quietly compress
	keep if year==2007
	keep naics vship
	destring naics, replace
	//473
	quietly save `ship'
	gen_naicsX_vars
	quietly save `ship_longvars'
	
//Now, match these variables

	find_match 6 `match_6' `row_code' `ship_longvars'
	find_match 5 `match_5' `row_code' `ship_longvars'
	find_match 4 `match_4' `row_code' `ship_longvars'
	find_match 3 `match_3' `row_code' `ship_longvars'

	append using `match_4'
	append using `match_5'
	append using `match_6'
	quietly save `code_naics'
	rename rowCode colCode
	rename rowNaics colNaics
	drop vship
	quietly save `colcode_naics'
	//down to 472, for some reason, IO table does not have 339111

//Now we create the weights
	use `code_naics', clear
	bysort rowCode: egen codeShip = sum(vship)
	gen weight = vship/codeShip
	drop vship codeShip
	quietly save `weights'

//now joinby to get the full IO table
	use `manuf_io', clear
	joinby rowNaics using `weights'
	rename naics naics_row
	joinby colNaics using `colcode_naics'
	//the number of rows should be 472^2

//Now multiply v or colShare by the weight and you did it!
	gen v_diag_protect = v
	replace v_diag_protect = v_diag_protect-1 if rowNaics==colNaics
	gen v_weighted = v_diag_protect*weight
	replace v_weighted = v_weighted+1 if naics_row==naics
	drop colShare
	tempfile io_cleaned naics_names //naics_merge1 naics_merge2
	quietly save `io_cleaned'
	collapse (count) v, by (naics_row rowDescription)
	drop v
	quietly save `naics_names'

/////////////////////////////////////////////////////////
//STEP 2: Get intensities, both for their own sake and for price calc
/////////////////////////////////////////////////////////

		
	//back to intensity
	use "$ENERGY_I", clear 
	rename industryname2007 industrylabel
	keep naics industrylabel intensity
	
	tempfile intensity intensity2
	quietly save `intensity2'
	rename naics naics_row
	quietly save `intensity'

	//great, merge io table with intesities, multiply together then, for each column
	//normalize to 1 this is a step to get our indirect prices.
	//get a list of the naics I am interested in
	tempfile io_intensity total_intensity
	
	use `io_cleaned', clear
	merge m:1 naics_row using `intensity', keep(match) nogenerate noreport

	//now, multiply the io value by intensity
	gen io_v = v_weighted*intensity
	//sum and normalize across each column
	bysort naics: egen io_v_tot= total(io_v)
	gen io_v_norm = io_v/io_v_tot
	
	compress
	quietly save "$ENERGY_I_IO", replace

	preserve
		* lighter version used in sims
		keep naics_row naics io_v_norm
		save "$IO/IO_energyintensity_lite.dta", replace
	restore
	

	//get the (pre normalized) intensity measures.
	collapse (sum) total_intensity = io_v, by(naics)

	merge m:1 naics using `intensity2', nogen noreport
	
	order naics total_intensity intensity industrylabel
	
	label var total_intensity "million BTU/ $000 shipment"
	label var intensity "million BTU/ $000 shipment"
	quietly save `total_intensity'
	
	// merge in naics2012 code
	preserve
		use "$ENERGY_I", clear
		keep naics naics2012
		tempfile naicscode_ei
		save `naicscode_ei', replace
	restore
	merge 1:1 naics using `naicscode_ei'
	drop _merge
	order naics naics2012 industrylabel total_intensity intensity
	quietly save "$NAICS_INTENSITY", replace
	
